Main Figures
Have a legend for all plots
Since we have set a parameter in our set_colors.R file that we source at the beginning and use this throughout, we can assure that the below legends will represent the legends called afterwards.
####### Legend for lakesite
legend_plot <- ggplot(metadata, aes(y = frac_bacprod, x = fraction, fill = fraction, shape = season)) +
geom_jitter(size = 3, width = 0.2) +
scale_fill_manual(values = fraction_colors, guide = guide_legend(override.aes = list(shape = 22, size = 4))) +
scale_shape_manual(values = season_shapes, guide = guide_legend(override.aes = list(size = 4))) +
theme(legend.position = "bottom", axis.title.x = element_blank(),
legend.title = element_blank())
# Extract the legend
season_legend <- get_legend(legend_plot)
####### Legend for lakesite
lakesite_legend <- ggplot(metadata, aes(y = frac_bacprod, x = fraction, fill = fraction, shape = lakesite)) +
geom_jitter(size = 3, width = 0.2) +
scale_fill_manual(values = fraction_colors, guide = guide_legend(override.aes = list(shape = 22, size = 4))) +
scale_shape_manual(values = lakesite_shapes, guide = guide_legend(override.aes = list(size = 4))) +
theme(legend.position = "bottom", axis.title.x = element_blank(),
legend.title = element_blank())
# Extract the legend
lakesite_legend <- get_legend(lakesite_legend)
Figure 1
######################################################### Fraction ABUNDANCe
frac_abund_wilcox <- wilcox.test(log10(as.numeric(fraction_bac_abund)) ~ fraction, data = metadata)
frac_abund_wilcox
##
## Wilcoxon rank sum test
##
## data: log10(as.numeric(fraction_bac_abund)) by fraction
## W = 0, p-value = 1.479e-06
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(as.numeric(fraction_bac_abund)))
## # A tibble: 2 x 2
## fraction `mean(as.numeric(fraction_bac_abund))`
## <fct> <dbl>
## 1 Particle 41169.
## 2 Free 734522.
# Make a data frame to draw significance line in boxplot (visually calculated)
dat1 <- data.frame(a = c(1.1,1.1,1.9,1.9), b = c(6.45,6.5,6.5,6.45)) # WholePart vs WholeFree
poster_a <-
ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(as.numeric(fraction_bac_abund)), x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
ylab("Log10(Bacterial Counts) \n (cells/mL)") +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
##### Particle vs free cell abundances
geom_path(data = dat1, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=6.55, size = 8, color = "gray40", label= paste("***")) +
annotate("text", x=1.5, y=6.4, size = 3.5, color = "gray40",
label= paste("p =", round(frac_abund_wilcox$p.value, digits = 6))) +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
totprod_wilcox <- wilcox.test(frac_bacprod ~ fraction, data = metadata)
totprod_wilcox
##
## Wilcoxon rank sum test
##
## data: frac_bacprod by fraction
## W = 33, p-value = 0.02418
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(frac_bacprod))
## # A tibble: 2 x 2
## fraction `mean(frac_bacprod)`
## <fct> <dbl>
## 1 Particle 9.96
## 2 Free 24.1
# Make a data frame to draw significance line in boxplot (visually calculated)
dat2 <- data.frame(a = c(1.1,1.1,1.9,1.9), b = c(71,72,72,71)) # WholePart vs WholeFree
poster_b <- ggplot(metadata, aes(y = frac_bacprod, x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
scale_fill_manual(values = fraction_colors, guide = FALSE) +
scale_shape_manual(values = season_shapes) +
scale_y_continuous(limits = c(0, 78), expand = c(0,0), breaks = seq(0, 75, by = 15)) +
ylab("Community Production \n (μgC/L/day)") +
##### Particle vs free bulk production
geom_path(data = dat2, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=73, size = 8, color = "gray40", label= paste("*")) +
annotate("text", x=1.5, y=69, size = 3.5, color = "gray40",
label= paste("p =", round(totprod_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
percellprod_wilcox <- wilcox.test(log10(fracprod_per_cell) ~ fraction, data = filter(metadata, norep_filter_name != "MOTEJ515"))
percellprod_wilcox
##
## Wilcoxon rank sum test
##
## data: log10(fracprod_per_cell) by fraction
## W = 125, p-value = 6.656e-05
## alternative hypothesis: true location shift is not equal to 0
filter(metadata, norep_filter_name != "MOTEJ515") %>%
group_by(fraction) %>%
summarize(mean(fracprod_per_cell))
## # A tibble: 2 x 2
## fraction `mean(fracprod_per_cell)`
## <fct> <dbl>
## 1 Particle 0.000000482
## 2 Free 0.0000000387
# Make a data frame to draw significance line in boxplot (visually calculated)
dat3 <- data.frame(a = c(1.1,1.1,1.9,1.9), b = c(-5.05,-5,-5,-5.05)) # WholePart vs WholeFree
poster_c <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(fracprod_per_cell), x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
scale_fill_manual(values = fraction_colors, guide = FALSE) +
scale_shape_manual(values = season_shapes) +
ylim(c(-8.5, -4.9)) +
ylab("Log10(Per-Capita Production) \n(μgC/cell/day)") +
##### Particle vs free per-cell production
geom_path(data = dat3, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=-4.95, size = 8, color = "gray40", label= paste("***")) +
annotate("text", x=1.5, y=-5.15, size = 3.5, color = "gray40",
label= paste("p =", round(percellprod_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######## FIGURE 1
# legend
legend <- get_legend(poster_a)
row1_plots <- plot_grid(poster_a + theme(legend.position = "none"), poster_b, poster_c,
labels = c("A", "B", "C"),
ncol = 3, nrow = 1)
fig_1 <- plot_grid(row1_plots, season_legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05)); fig_1
## Change the theme to be minimal
prezzzz <- plot_grid(poster_a + theme_minimal() + theme(legend.position = "none"),
poster_b + theme_minimal() + theme(legend.position = "none"),
poster_c + theme_minimal() + theme(legend.position = "none"),
labels = c("A", "B", "C"),
ncol = 3, nrow = 1)
fig_1_minimal <- plot_grid(prezzzz, season_legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05)); fig_1_minimal
Linear Regressions with Fraction Diversity
# Free-Living samples only
div_measures_free <- otu_alphadiv %>%
dplyr::select(fraction, mean, measure, frac_bacprod, fracprod_per_cell_noinf) %>%
filter(fraction == "Free") %>%
dplyr::select(-fraction)
# Particle-associated samples only
div_measures_part <- otu_alphadiv %>%
dplyr::select(fraction, mean, measure, frac_bacprod, fracprod_per_cell_noinf) %>%
filter(fraction == "Particle") %>%
dplyr::select(-fraction)
## All of the samples!
div_measures_all <- otu_alphadiv %>%
dplyr::select(mean, measure, frac_bacprod, fracprod_per_cell_noinf)
########################################################
############## Particle: Community-wide
lm_divs_particle_comm <- div_measures_part %>%
dplyr::select(-fracprod_per_cell_noinf) %>%
group_by(measure) %>%
do(glance(lm(frac_bacprod ~ mean, data = .))) %>%
mutate(fraction = "Particle", dependent = "Community Production") %>%
dplyr::select(fraction, dependent, measure, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
############## Particle: Community-wide
lm_divs_particle_percap <- div_measures_part %>%
dplyr::select(-frac_bacprod) %>%
group_by(measure) %>%
do(glance(lm(log10(fracprod_per_cell_noinf) ~ mean, data = .))) %>%
mutate(fraction = "Particle", dependent = "Per-Capita Production") %>%
dplyr::select(fraction, dependent, measure, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
########################################################
############## Free: Community-wide
lm_divs_free_comm <- div_measures_free %>%
dplyr::select(-fracprod_per_cell_noinf) %>%
group_by(measure) %>%
do(glance(lm(frac_bacprod ~ mean, data = .))) %>%
mutate(fraction = "Free", dependent = "Community Production") %>%
dplyr::select(fraction, dependent, measure, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
############## Free: Community-wide
lm_divs_free_percap <- div_measures_free %>%
dplyr::select(-frac_bacprod) %>%
group_by(measure) %>%
do(glance(lm(log10(fracprod_per_cell_noinf) ~ mean, data = .))) %>%
mutate(fraction = "Free", dependent = "Per-Capita Production") %>%
dplyr::select(fraction, dependent, measure, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
########################################################
############## All Samples: Community-wide
lm_divs_all_comm <- div_measures_all %>%
dplyr::select(-fracprod_per_cell_noinf) %>%
group_by(measure) %>%
do(glance(lm(frac_bacprod ~ mean, data = .))) %>%
mutate(fraction = "All Samples", dependent = "Community Production") %>%
dplyr::select(fraction, dependent, measure, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
############## All Samples: Community-wide
lm_divs_all_percap <- div_measures_all %>%
dplyr::select(-frac_bacprod) %>%
group_by(measure) %>%
do(glance(lm(log10(fracprod_per_cell_noinf) ~ mean, data = .))) %>%
mutate(fraction = "All Samples", dependent = "Per-Capita Production") %>%
dplyr::select(fraction, dependent, measure, AIC, adj.r.squared, p.value) %>%
ungroup() %>%
mutate(FDR.p = p.adjust(p.value, method = "fdr"))
## Filtered PCA Linear Model Results
# Put all of the PCA and environmental linear model results together into one dataframe
lm_div_results <-
bind_rows(lm_divs_particle_comm, lm_divs_particle_percap,
lm_divs_free_comm, lm_divs_free_percap,
lm_divs_all_comm, lm_divs_all_percap) %>%
dplyr::rename(independent_var = measure,
dependent_var = dependent)
See supplemental table 1 header for the table output!
Cross Validation
Cross validation will provide a better estimate for the adjusted R-squared value of the diversity regressions.
#################################### Community-Wide Production ####################################
################### Richness ###################
lm_prod_vs_rich_PA <- lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.1446 -2.8561 -0.4608 3.7354 9.9395
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.167033 5.731339 -1.948 0.07996 .
## mean 0.037941 0.009891 3.836 0.00329 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.499 on 10 degrees of freedom
## Multiple R-squared: 0.5954, Adjusted R-squared: 0.5549
## F-statistic: 14.72 on 1 and 10 DF, p-value: 0.003286
summary(lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.013 -11.090 -2.717 8.548 33.314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.32029 21.71997 0.245 0.811
## mean 0.05540 0.06242 0.888 0.396
##
## Residual standard error: 17.7 on 10 degrees of freedom
## Multiple R-squared: 0.07303, Adjusted R-squared: -0.01966
## F-statistic: 0.7879 on 1 and 10 DF, p-value: 0.3956
# Without the 2 high samples
summary(lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Particle" & mean < 700)))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Particle" & mean < 700))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.7760 -1.6847 -0.6523 2.8624 6.3591
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.43520 8.53971 -0.636 0.542
## mean 0.02559 0.01708 1.498 0.172
##
## Residual standard error: 4.365 on 8 degrees of freedom
## Multiple R-squared: 0.2191, Adjusted R-squared: 0.1215
## F-statistic: 2.244 on 1 and 8 DF, p-value: 0.1725
rich_test_df <-
richness %>%
mutate(frac_boolean = (fraction == "Particle")*1)
# Correlation of richness between PA and FL
rich_test_df_sorted <- arrange(rich_test_df, norep_water_name)
rich_test_df_sorted_PA <- filter(rich_test_df_sorted, fraction == "Particle")
rich_test_df_sorted_FL <- filter(rich_test_df_sorted, fraction == "Free")
cor.test(rich_test_df_sorted_PA$mean, rich_test_df_sorted_FL$mean)
##
## Pearson's product-moment correlation
##
## data: rich_test_df_sorted_PA$mean and rich_test_df_sorted_FL$mean
## t = 0.75172, df = 10, p-value = 0.4695
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3950677 0.7108262
## sample estimates:
## cor
## 0.2312697
# Look into car::vif() for multicollinearity
cor(rich_test_df$mean, rich_test_df$frac_boolean)
## [1] 0.6510939
summary(lm(frac_bacprod ~ mean + frac_boolean, data = rich_test_df))
##
## Call:
## lm(formula = frac_bacprod ~ mean + frac_boolean, data = rich_test_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.449 -5.824 -1.118 5.647 34.871
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.00701 7.87588 1.271 0.21777
## mean 0.04155 0.02055 2.021 0.05616 .
## frac_boolean -23.18168 6.90002 -3.360 0.00297 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.83 on 21 degrees of freedom
## Multiple R-squared: 0.3506, Adjusted R-squared: 0.2887
## F-statistic: 5.668 on 2 and 21 DF, p-value: 0.01076
PA_rich_residplot <- plot_residuals(lm_prod_vs_rich_PA, filter(richness, fraction == "Particle")$frac_bacprod, "Community Production vs Particle-Associated Richness")
## [1] "There are no high leverage points in this model."
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_rich_PA <- train(
frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_rich_PA$results # Particle Community-Wide CV results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 6.761303 0.6544213 5.382886 2.14011 0.2954728 1.736207
################### Shannon Entropy ###################
lm_prod_vs_shannon_PA <- lm(frac_bacprod ~ mean, data = filter(shannon, fraction == "Particle"))
cv_lm_prod_vs_shannon_PA <- train(
frac_bacprod ~ mean, data = filter(shannon, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_shannon_PA$results # Particle Community-Wide CV results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 7.759682 0.7236966 5.788561 3.884461 0.2689809 2.736891
################### Inverse Simpson ###################
lm_prod_vs_invsimps_PA <- lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_prod_vs_invsimps_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5101 -2.0966 -0.1789 0.8421 7.8801
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.42551 2.43457 -0.175 0.864742
## mean 0.29241 0.05761 5.076 0.000481 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.572 on 10 degrees of freedom
## Multiple R-squared: 0.7204, Adjusted R-squared: 0.6924
## F-statistic: 25.76 on 1 and 10 DF, p-value: 0.0004809
# Without the 2 high samples
summary(lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle" & mean < 70)))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Particle" & mean < 70))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.2949 -1.6336 -0.3733 1.4762 6.5363
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6287 2.4758 0.658 0.5291
## mean 0.2050 0.0806 2.543 0.0345 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.672 on 8 degrees of freedom
## Multiple R-squared: 0.4471, Adjusted R-squared: 0.378
## F-statistic: 6.469 on 1 and 8 DF, p-value: 0.03452
PA_invsimps_residplot <- plot_residuals(lm_prod_vs_invsimps_PA, filter(invsimps, fraction == "Particle")$frac_bacprod, "Community Production vs Particle-Associated Inverse Simpson")
## [1] "There are no high leverage points in this model."
invsimps_test_df <-
invsimps %>%
mutate(frac_boolean = (fraction == "Particle")*1)
cor(invsimps_test_df$mean, invsimps_test_df$frac_boolean)
## [1] 0.3171016
summary(lm(frac_bacprod ~ mean + frac_boolean, data = invsimps_test_df))
##
## Call:
## lm(formula = frac_bacprod ~ mean + frac_boolean, data = invsimps_test_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.336 -5.979 -0.144 2.625 37.405
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.3727 5.1919 3.154 0.00479 **
## mean 0.3193 0.1521 2.099 0.04813 *
## frac_boolean -17.7516 5.4875 -3.235 0.00397 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.75 on 21 degrees of freedom
## Multiple R-squared: 0.3587, Adjusted R-squared: 0.2976
## F-statistic: 5.872 on 2 and 21 DF, p-value: 0.009428
# Correlation of inverse Simpson's index between PA and FL
invsimps_test_df_sorted <- arrange(invsimps_test_df, norep_water_name)
invsimps_test_df_sorted_PA <- filter(invsimps_test_df_sorted, fraction == "Particle")
invsimps_test_df_sorted_FL <- filter(invsimps_test_df_sorted, fraction == "Free")
cor.test(invsimps_test_df_sorted_PA$mean, invsimps_test_df_sorted_FL$mean)
##
## Pearson's product-moment correlation
##
## data: invsimps_test_df_sorted_PA$mean and invsimps_test_df_sorted_FL$mean
## t = 1.7221, df = 10, p-value = 0.1158
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1318285 0.8255638
## sample estimates:
## cor
## 0.4782564
cv_lm_prod_vs_invsimps_PA <- train(
frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_invsimps_PA$results # Particle Community-Wide CV results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 5.153135 0.7560751 3.868983 2.056956 0.2676419 1.70888
################### Simpson's Evenness ###################
lm_prod_vs_simpseven_PA <- lm(frac_bacprod ~ mean, data = filter(simpseven, fraction == "Particle"))
cv_lm_prod_vs_simpseven_PA <- train(
frac_bacprod ~ mean, data = filter(simpseven, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_simpseven_PA$results # Particle Community-Wide CV results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 6.210928 0.6561583 4.828706 3.029085 0.3026097 2.361936
#################################### Per-Capita Production ####################################
################### Richness ###################
lm_percell_prod_vs_rich_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_percell_prod_vs_rich_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5548 -0.2142 -0.0078 0.1246 0.5942
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.055248 0.372497 -21.625 4.55e-09 ***
## mean 0.002368 0.000636 3.723 0.00475 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.352 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6064, Adjusted R-squared: 0.5626
## F-statistic: 13.86 on 1 and 9 DF, p-value: 0.004746
# Dummy variable regression
summary(lm(log10(fracprod_per_cell_noinf) ~ mean + frac_boolean,
data = rich_test_df))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean + frac_boolean,
## data = rich_test_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.73738 -0.19725 -0.01448 0.24657 0.63488
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.3263959 0.2235397 -37.248 < 2e-16 ***
## mean 0.0022216 0.0005838 3.805 0.00111 **
## frac_boolean 0.3534155 0.1998678 1.768 0.09227 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3631 on 20 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6964, Adjusted R-squared: 0.6661
## F-statistic: 22.94 on 2 and 20 DF, p-value: 6.651e-06
PA_rich_percell_residplot <- plot_residuals(lm_percell_prod_vs_rich_PA, filter(richness, fraction == "Particle")$lm(log10(fracprod_per_cell_noinf)), "Log10(Cellular Production) vs Particle-Associated Richness")
## [1] "There are no high leverage points in this model."
## Error in xy.coords(x, y, xlabel, ylabel, log): attempt to apply non-function
cv_lm_percell_prod_vs_rich_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_rich_PA$results # Particle Per-Capita CV results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 0.4135688 0.6353932 0.3319425 0.1430362 0.3182312 0.127428
################### Shannon Entropy ###################
lm_percell_prod_vs_shannon_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Particle"))
cv_lm_percell_prod_vs_shannon_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_shannon_PA$results # Particle Per-Capita CV results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 0.4698687 0.7031069 0.350353 0.1911322 0.3283053 0.1416735
################### Inverse Simpson ###################
lm_percell_prod_vs_invsimps_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_percell_prod_vs_invsimps_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28933 -0.18289 -0.11341 0.07465 0.56333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.358363 0.159683 -46.08 5.34e-12 ***
## mean 0.018005 0.003758 4.79 0.000987 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2978 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7183, Adjusted R-squared: 0.687
## F-statistic: 22.95 on 1 and 9 DF, p-value: 0.0009868
# Dummy variable regression
summary(lm(log10(fracprod_per_cell_noinf) ~ mean + frac_boolean, data = invsimps_test_df))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean + frac_boolean,
## data = invsimps_test_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.67981 -0.17066 -0.00109 0.13124 0.63544
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.01185 0.13640 -58.739 < 2e-16 ***
## mean 0.01815 0.00400 4.537 0.000201 ***
## frac_boolean 0.64856 0.14654 4.426 0.000260 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3347 on 20 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7421, Adjusted R-squared: 0.7163
## F-statistic: 28.77 on 2 and 20 DF, p-value: 1.304e-06
PA_invsimps_percell_residplot <- plot_residuals(lm_percell_prod_vs_invsimps_PA, filter(invsimps, fraction == "Particle")$lm(log10(fracprod_per_cell_noinf)), "Log10(Cellular Production) vs Particle-Associated Inverse Simpson")
## [1] "There are no high leverage points in this model."
## Error in xy.coords(x, y, xlabel, ylabel, log): attempt to apply non-function
cv_lm_percell_prod_vs_invsimps_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_invsimps_PA$results # Particle Per-Capita CV results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 0.3627277 0.7384826 0.2953794 0.1152849 0.3120322 0.09661281
################### Simpson's Evenness ###################
lm_percell_prod_vs_simpseven_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Particle"))
cv_lm_percell_prod_vs_simpseven_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_simpseven_PA$results # Particle Per-Capita CV results
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 0.4361836 0.6603424 0.3503975 0.1419614 0.3057268 0.1133807
Diversity distributions
rich_fraction_wilcox <- wilcox.test(mean ~ fraction, data = richness)
# Make a data frame to draw significance line in boxplot (visually calculated)
rich_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(920, 930, 930, 920)) # WholePart vs WholeFree
rich_vert_distribution_plot <-
ggplot(richness, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(150,980), breaks = seq(from = 0, to =925, by = 150)) +
ylab("Observed Richness") + xlab("Fraction") +
scale_shape_manual(values = season_shapes) +
geom_path(data = rich_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=980, size = 8, color = "#424645",
label= paste("***")) +
annotate("text", x=1.5, y=880, size = 4, color = "#424645",
label= paste("p =", round(rich_fraction_wilcox$p.value, digits = 5))) +
theme(legend.position = "none", axis.title.x = element_blank())
######################################################### EXPONENTIAL SHANNON #########################################################
exp_shannon_fraction_wilcox <- wilcox.test(mean ~ fraction, data = shannon)
# Make a data frame to draw significance line in boxplot (visually calculated)
exp_shannon_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(310,315,315,310)) # WholePart vs WholeFree
exp_shannon_vert_distribution_plot <-
ggplot(shannon, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,325), breaks = seq(from = 0, to =300, by = 50)) +
ylab("Exp(Shannon)") + xlab("Fraction") +
scale_shape_manual(values = season_shapes) +
geom_path(data = exp_shannon_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=325, size = 8, color = "#424645",
label= paste("**")) +
annotate("text", x=1.5, y=300, size = 4, color = "#424645",
label= paste("p =", round(exp_shannon_fraction_wilcox$p.value, digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank())
######################################################### INVERSE SIMPSON #########################################################
invsimps_fraction_wilcox <- wilcox.test(mean ~ fraction, data = invsimps)
# Make a data frame to draw significance line in boxplot (visually calculated)
invsimps_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(83,85,85,83)) # WholePart vs WholeFree
invsimps_vert_distribution_plot <-
ggplot(invsimps, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
ylab("Inverse Simpson") + xlab("Fraction") +
geom_path(data = invsimps_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=80, size = 4, color = "#424645", label= "NS") +
theme(legend.position = "none", axis.title.x = element_blank())
pdiv1 <- plot_grid(rich_vert_distribution_plot, exp_shannon_vert_distribution_plot, invsimps_vert_distribution_plot,
ncol = 3, nrow = 1, labels = c("A", "B", "C"))
pdiv1_boxplots <- plot_grid(pdiv1, season_legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05)); pdiv1_boxplots
pdiv2 <- plot_grid(rich_vert_distribution_plot + theme_minimal() +
theme(legend.position = "none", axis.title.x = element_blank()),
exp_shannon_vert_distribution_plot + theme_minimal() +
theme(legend.position = "none", axis.title.x = element_blank()),
invsimps_vert_distribution_plot + theme_minimal() +
theme(legend.position = "none", axis.title.x = element_blank()),
ncol = 3, nrow = 1, labels = c("A", "B", "C"));
pdiv2_boxplots <- plot_grid(pdiv2, season_legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05)); pdiv2_boxplots
Prepare Figure 2
######################################################### OBSERVED RICHNESS #########################################################
rich_fraction_wilcox <- wilcox.test(mean ~ fraction, data = richness)
rich_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 129, p-value = 0.0004955
## alternative hypothesis: true location shift is not equal to 0
filter(richness) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean), min(mean), max(mean))
## # A tibble: 2 x 5
## fraction `mean(mean)` `sd(mean)` `min(mean)` `max(mean)`
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Particle 557. 168. 344. 909.
## 2 Free 338. 85.5 239. 510.
# Make a data frame to draw significance line in boxplot (visually calculated)
rich_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(920, 930, 930, 920)) # WholePart vs WholeFree
rich_distribution_plot <-
ggplot(richness, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(150,980), breaks = seq(from = 0, to =925, by = 150)) +
xlab("Observed Richness") + xlab("Fraction") +
scale_shape_manual(values = season_shapes) +
geom_path(data = rich_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=980, size = 8, color = "#424645", label= paste("***"), angle = 90) +
annotate("text", x=1.5, y=790, size = 4, color = "#424645",
label= paste("p =", round(rich_fraction_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
################ Richness vs Community-wide (Per-Liter) Production
## 1. Extract the R2 and p-value from the linear model:
lm_lab_perliter_rich_PA <- paste("atop(R^2 ==", round(summary(lm_prod_vs_rich_PA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_prod_vs_rich_PA)$coefficients[,4][2]), digits = 3), ")")
## 2. Plot Richness vs Community-wide (Per-Liter) Production
prod_vs_rich_plot <-
ggplot(richness, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("\n Community Production \n (μgC/L/day)") +
xlab("Observed Richness") +
geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,980), breaks = seq(from = 0, to =925, by = 150)) +
# Add the R2 and p-value to the plot
annotate("text", x=790, y=45, label=lm_lab_perliter_rich_PA, parse = TRUE, color = "#FF6600", size = 4) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Summary stats
summary(lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.1446 -2.8561 -0.4608 3.7354 9.9395
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.167033 5.731339 -1.948 0.07996 .
## mean 0.037941 0.009891 3.836 0.00329 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.499 on 10 degrees of freedom
## Multiple R-squared: 0.5954, Adjusted R-squared: 0.5549
## F-statistic: 14.72 on 1 and 10 DF, p-value: 0.003286
# WITHOUT THE TWO HIGH POINTS
summary(lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Particle" & mean < 800)))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Particle" & mean < 800))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.7760 -1.6847 -0.6523 2.8624 6.3591
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.43520 8.53971 -0.636 0.542
## mean 0.02559 0.01708 1.498 0.172
##
## Residual standard error: 4.365 on 8 degrees of freedom
## Multiple R-squared: 0.2191, Adjusted R-squared: 0.1215
## F-statistic: 2.244 on 1 and 8 DF, p-value: 0.1725
################ Richness vs Per Capita (Per-Cell) Production
## 1. Extract the R2 and p-value from the linear model:
lm_lab_percell_rich_PA <- paste("atop(R^2 ==", round(summary(lm_percell_prod_vs_rich_PA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_percell_prod_vs_rich_PA)$coefficients[,4][2]), digits = 3), ")")
## 2. Plot Richness vs Per Capita (Per-Cell) Production
percell_prod_vs_rich_plot <-
ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("\n log10(Per-Capita Production)\n (μgC/cell/day)") +
xlab("Observed Richness") +
geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,980), breaks = seq(from = 0, to =925, by = 150)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
# Add the R2 and p-value to the plot
annotate("text", x=790, y=-7.5, label=lm_lab_percell_rich_PA, parse = TRUE, color = "#FF6600", size = 4) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
# Summary stats
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5548 -0.2142 -0.0078 0.1246 0.5942
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.055248 0.372497 -21.625 4.55e-09 ***
## mean 0.002368 0.000636 3.723 0.00475 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.352 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6064, Adjusted R-squared: 0.5626
## F-statistic: 13.86 on 1 and 9 DF, p-value: 0.004746
# WITHOUT THE TWO HIGH POINTS
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle" & mean < 800)))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Particle" & mean < 800))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.39370 -0.18850 -0.00462 0.16877 0.42371
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.3565895 0.5415567 -13.584 2.76e-06 ***
## mean 0.0008726 0.0010848 0.804 0.448
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2768 on 7 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.08462, Adjusted R-squared: -0.04615
## F-statistic: 0.6471 on 1 and 7 DF, p-value: 0.4476
######################################################### SHANNON ENTROPY #########################################################
shannon_fraction_wilcox <- wilcox.test(mean ~ fraction, data = shannon)
shannon_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 119, p-value = 0.00556
## alternative hypothesis: true location shift is not equal to 0
filter(shannon) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 x 3
## fraction `mean(mean)` `sd(mean)`
## <fct> <dbl> <dbl>
## 1 Particle 109. 69.6
## 2 Free 55.9 17.0
# Make a data frame to draw significance line in boxplot (visually calculated)
shannon_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(310, 315, 315, 310)) # WholePart vs WholeFree
shannon_distribution_plot <-
ggplot(shannon, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, aes(shape = season, fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,325), breaks = seq(from = 0, to = 300, by = 50)) +
xlab("Exp(Shannon)") + xlab("Fraction") +
scale_shape_manual(values = season_shapes) +
# Add line and pval
geom_path(data = shannon_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=325, size = 8, color = "#424645", label= paste("**"), angle = 90) +
annotate("text", x=1.5, y=270, size = 4, color = "#424645",
label= paste("p =", round(shannon_fraction_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
################ Shannon vs Community-wide (Per-Liter) Production
## 1. Extract the R2 and p-value from the linear model:
lm_lab_perliter_shannon_PA <- paste("atop(R^2 ==", round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3), ")")
## 2. Plot Shannon vs Community-wide (Per-Liter) Production
prod_vs_shannon_plot <-
ggplot(shannon, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
scale_fill_manual(values = fraction_colors) +
ylab("\n Community Production \n (μgC/L/day)") +
scale_x_continuous(limits = c(0,325), breaks = seq(from = 0, to = 300, by = 50)) +
xlab("Exp(Shannon)") +
geom_smooth(data=filter(shannon, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
# Add the R2 and p-value to the plot
annotate("text", x=250, y=45, label=lm_lab_perliter_shannon_PA, parse = TRUE, color = "#FF6600", size = 4) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
################ Shannon vs Per Capitra (Per-Cell) Production
## 1. Extract the R2 and p-value from the linear model:
lm_lab_percell_shannon_PA <- paste("atop(R^2 ==", round(summary(lm_percell_prod_vs_shannon_PA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_percell_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3), ")")
## 2. Plot Shannon vs Per Capitra (Per-Cell) Production
percell_prod_vs_shannon_plot <-
ggplot(shannon, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("\n log10(Per-Capita Production)\n (μgC/cell/day)") +
xlab("Exp(Shannon)") +
geom_smooth(data=filter(shannon, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,325), breaks = seq(from = 0, to = 300, by = 50)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
# Add the R2 and p-value to the plot
annotate("text", x=250, y=-7.5, label=lm_lab_percell_shannon_PA, parse = TRUE, color = "#FF6600", size = 4) +
theme(legend.title = element_blank(), legend.position = "none")
shannon_plots <- plot_grid(shannon_distribution_plot, prod_vs_shannon_plot, percell_prod_vs_shannon_plot,
#labels = c("B", "D", "F"),
ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")
######################################################### INVERSE SIMPSON #########################################################
invsimps_fraction_wilcox <- wilcox.test(mean ~ fraction, data = invsimps)
invsimps_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 80, p-value = 0.6707
## alternative hypothesis: true location shift is not equal to 0
filter(invsimps) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 x 3
## fraction `mean(mean)` `sd(mean)`
## <fct> <dbl> <dbl>
## 1 Particle 35.5 23.9
## 2 Free 24.1 8.11
# Make a data frame to draw significance line in boxplot (visually calculated)
invsimps_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(83,85,85,83)) # WholePart vs WholeFree
invsimps_distribution_plot <-
ggplot(invsimps, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
ylab("Inverse Simpson") + xlab("Fraction") +
geom_path(data = invsimps_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=80, size = 4, color = "#424645", label= "NS") +
theme(legend.position = "none", #axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
################ Inverse Simpson vs Community-wide (Per-Liter) Production
## 1. Extract the R2 and p-value from the linear model:
lm_lab_perliter_invsimps_PA <- paste("atop(R^2 ==", round(summary(lm_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_prod_vs_invsimps_PA)$coefficients[,4][2]), digits = 4), ")")
## 2. Plot Inverse Simpson vs Community-wide (Per-Liter) Production
prod_vs_invsimps_plot <-
ggplot(invsimps, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("Community Production \n (μgC/L/day)") +
xlab("Inverse Simpson") +
geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
# Add the R2 and p-value to the plot
annotate("text", x=70, y=45, label=lm_lab_perliter_invsimps_PA, parse = TRUE, color = "#FF6600", size = 4) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Summary stats
summary(lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5101 -2.0966 -0.1789 0.8421 7.8801
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.42551 2.43457 -0.175 0.864742
## mean 0.29241 0.05761 5.076 0.000481 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.572 on 10 degrees of freedom
## Multiple R-squared: 0.7204, Adjusted R-squared: 0.6924
## F-statistic: 25.76 on 1 and 10 DF, p-value: 0.0004809
# WITHOUT THE TWO HIGH POINTS
summary(lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle" & mean < 70)))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Particle" & mean < 70))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.2949 -1.6336 -0.3733 1.4762 6.5363
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6287 2.4758 0.658 0.5291
## mean 0.2050 0.0806 2.543 0.0345 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.672 on 8 degrees of freedom
## Multiple R-squared: 0.4471, Adjusted R-squared: 0.378
## F-statistic: 6.469 on 1 and 8 DF, p-value: 0.03452
################ Inverse Simpson vs Per Capitra (Per-Cell) Production
## 1. Extract the R2 and p-value from the linear model:
lm_lab_percell_invsimps_PA <- paste("atop(R^2 ==", round(summary(lm_percell_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), ",",
"p ==", round(unname(summary(lm_percell_prod_vs_invsimps_PA)$coefficients[,4][2]), digits = 4), ")")
## 2. Plot Inverse Simpson vs Per Capitra (Per-Cell) Production
percell_prod_vs_invsimps_plot <-
ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf), fill = fraction, color = fraction)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, color = "black", aes(shape = season)) +
scale_color_manual(values = fraction_colors, guide = TRUE) +
scale_fill_manual(values = fraction_colors, guide = FALSE) +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/day)") +
xlab("Inverse Simpson") +
geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
# Add the R2 and p-value to the plot
annotate("text", x=70, y=-7.5, label=lm_lab_percell_invsimps_PA, parse = TRUE, color = "#FF6600", size = 4) +
guides(color = guide_legend(override.aes = list(shape = 15))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
# Summary stats
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28933 -0.18289 -0.11341 0.07465 0.56333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.358363 0.159683 -46.08 5.34e-12 ***
## mean 0.018005 0.003758 4.79 0.000987 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2978 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7183, Adjusted R-squared: 0.687
## F-statistic: 22.95 on 1 and 9 DF, p-value: 0.0009868
# WITHOUT THE TWO HIGH POINTS
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle" & mean < 70)))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Particle" & mean < 70))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.23904 -0.19052 -0.03487 0.06990 0.49055
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.172409 0.164716 -43.544 8.8e-10 ***
## mean 0.009523 0.005573 1.709 0.131
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.243 on 7 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2944, Adjusted R-squared: 0.1936
## F-statistic: 2.92 on 1 and 7 DF, p-value: 0.1312
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle" & mean < 70 & log10(fracprod_per_cell_noinf) < 1e-7)))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Particle" & mean < 70 & log10(fracprod_per_cell_noinf) <
## 1e-07))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.23904 -0.19052 -0.03487 0.06990 0.49055
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.172409 0.164716 -43.544 8.8e-10 ***
## mean 0.009523 0.005573 1.709 0.131
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.243 on 7 degrees of freedom
## Multiple R-squared: 0.2944, Adjusted R-squared: 0.1936
## F-statistic: 2.92 on 1 and 7 DF, p-value: 0.1312
# Plot Inverse simpson plots altogether
invsimps_plots <- plot_grid(invsimps_distribution_plot, prod_vs_invsimps_plot, percell_prod_vs_invsimps_plot,
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")